About

In this markdown, we will analyse the networks of TF/target gene cis-regulatory interactinos generated using ANANSE.

We will load these networks as tabular data that we will transform into igraph graph objects, that we can parse and annotate using our information on TF classes and other similar functional annotation that we have carried out over the course of this project.

Load libraries

library(DESeq2)
library(limma)
library(ComplexHeatmap)
library(circlize)
library(ggplot2)
library(igraph)

Load functions

source("code/r_code/r_functions/sourcefolder.R")

sourceFolder(
  "code/r_code/r_functions",
  recursive = TRUE
  )

sourceFolder(
  "code/r_code/r_general",
  recursive = TRUE
  )

Load previous Data

We will need some of the information we have generated in our previous analyses such as stage-specific cluster information and Transcription factor annotation.

# RNA clusters
load("outputs/rda/stage_specific_clusters.rda")

# Transcription Factor information
load("outputs/rda/TF_annotation.rda")

Load and prepare information for graph analysis

Here we will load all the remaining information of our interest and we will transform it into a format compliant to some of the functions we have made to annotate the graphs; basically, tabular data with gene id <–> annotation category.

# Transcription factors

allTFclasses_col <- c(topclasses_col,otherclasses_col)

pfla_tfs_graph_analysis <-
  merge(
    pfla_tfs,
    data.frame(
      TFclass = 
        c(names(topclasses_col),names(otherclasses_col)),
      col = 
        c(topclasses_col,otherclasses_col)
      ),
    by.x = 2,
    by.y = 1
  )[,c(2,1,3)]

colnames(pfla_tfs_graph_analysis) <- c("id","TFclass","col")

# Effector genes
pfla_tfeg <- data.frame(
  id = allgenes,
  TFEG = ifelse(allgenes %in% pfla_tfs$id, "TF", "EG")
)

# Functional categories (COG)
pfla_funcat <- read.table(
  "/home/ska/aperpos/projects/ptychodera_cisreg_development/outputs/functional_annotation/COGs/pfla_cogs.tsv",
  col.names = c("id","funcat")
)

# Trans-developmental / housekeeping
pfla_td_nohk <- 
  unique(
    read.table(
      "outputs/functional_annotation/TD_annotation/transdev_expanded_nohk.tsv",
      header=F,
      stringsAsFactors = F)[,1]
    )

pfla_hk_notd <- 
  unique(
    read.table(
      "outputs/functional_annotation/TD_annotation/housekeep_expanded_notd.tsv",
      header=F,
      stringsAsFactors=F)[,1]
    )

pfla_tdhk <- data.frame(
  id = allgenes,
  TDHK =  
    ifelse(allgenes %in% pfla_td_nohk, "td",
    ifelse(allgenes %in% pfla_hk_notd, "hk", 
    "none")
    )
  )

Finally, we add them into a list that our function for annotation will iterate through. We will call this the list of attributes

pfla_attributes_list <- 
  list(
    pfla_tfeg,
    pfla_tfs_graph_analysis,
    pfla_funcat,
    pfla_tdhk
    )

We will also load the GO annotation of Ptychodera to use it later on in the functions.

#gene universe
gene_universe <- allgenes

# gene-GO mappings
pfla_id_GO <-
  readMappings(
    "outputs/functional_annotation/go_blast2go/GO_annotation.txt"
  )

Likewise, we will also load the EggNOG annotation of gene names for more clarity in the plots of the graphs.

pfla_genenames <-
  read.delim2(
    file = "outputs/functional_annotation/eggnog/emapper.annotations",
    skip = 3,
    header = TRUE
  )[,c(1,5,13)]
pfla_genenames <- pfla_genenames[pfla_genenames$predicted_gene_name != "",]

Load ANANSE outputs

Here we will use the function LoadNetworkData that reads the .network file replacing instances of em dash and other ANANSE-related quirks. The resulting dataframe has three columns: “tf” (transcription factor), “tg” (target gene), and “prob” (probability). We manually add back underscores to the gene ids because ANANSE required the gene ids in a different format.

We do this for both the Early Blastula (EB) and Late Gastrula (LG) networks.

# EARLY BLASTULA
pfla_EB_nw <- LoadNetworkData("outputs/ananse/pfla_EB.network")
## Warning in readLines(x): incomplete final line found on
## 'outputs/ananse/pfla_EB.network'
pfla_EB_nw$tf <- gsub("TCONS", "TCONS_", pfla_EB_nw$tf)
pfla_EB_nw$tg <- gsub("TCONS", "TCONS_", pfla_EB_nw$tg)

# LATE GASTRULA
pfla_LG_nw <- LoadNetworkData("outputs/ananse/pfla_LG.network")
pfla_LG_nw$tf <- gsub("TCONS", "TCONS_", pfla_LG_nw$tf)
pfla_LG_nw$tg <- gsub("TCONS", "TCONS_", pfla_LG_nw$tg)

Here it is what it looks like.

head(pfla_EB_nw)
##                tf             tg       prob
## 1: TCONS_00000165 TCONS_00000001 0.13640409
## 2: TCONS_00000165 TCONS_00000003 0.11046195
## 3: TCONS_00000165 TCONS_00000004 0.10880337
## 4: TCONS_00000165 TCONS_00000006 0.09827907
## 5: TCONS_00000165 TCONS_00000007 0.09742769
## 6: TCONS_00000165 TCONS_00000008 0.17221461

Early Blastula

The first thing we will do is filter the network and keep only those interactions whose values are in the top 5% percentile. We will use this later on to quantify and measure the In- and Out-Degree of each gene, among other things.

pfla_EB_nw2 <- FilterNetwork(pfla_EB_nw,q=0.95)
## [1] "Original network is 35854 genes. Filtered network is 8256 genes."

We generate an igraph object using our custom wrapper GenerateNetwork that also filter by the same number of interactions, since we provide q = 0.95.

pfla_EB_graph <- GenerateNetwork(pfla_EB_nw,q = 0.95)

Using our list of attributes, we add this information to all the pertinent nodes (== genes) of our network using our ParseNetwork function.

pfla_EB_parsenetwork <- ParseNetwork(pfla_EB_graph, pfla_attributes_list)

The resulting object can be subsetted to extract the new network with attributes on one object and the data frame of attributes on the other.

pfla_EB_graph2 <- pfla_EB_parsenetwork[[1]]
pfla_EB_df_attr <- pfla_EB_parsenetwork[[2]]

The data frame of attributes is a data.frame object sorted in the same order as nodes are indexed in the igraph object. The content of this data frame is, for every node that has an attribute, a row with the id of this node, the index of this node, and a bunch of attributes (color, whether it is a trans-developmental gene, TF class, functional category, etc.)

head(pfla_EB_df_attr)
##               id index TFEG     TFclass             col funcat TDHK
## 1 TCONS_00001049     1   TF       T-box         #f38d97      K none
## 2 TCONS_00001141     2   TF     C2H2_ZF         #ffebb5      K none
## 3 TCONS_00001174     3   TF        Runt lightgoldenrod2      Z none
## 4 TCONS_00001304     4   TF     C2H2_ZF         #ffebb5      O   td
## 5 TCONS_00001709     5   TF Homeodomain         #5a5a82        none
## 6 TCONS_00001951     6   TF         Pou     chartreuse2      K none

Finally, we use our wrapper NetworkStats.

This wrapper performs a number of calculations using our igraph object and the tabular data frame, including also the gene ontology of target genes, and also reports the top central genes, the centrality of different kinds of genes by attribute (e.g. centrality of genes by TF class).

The inputs of the function are:

The function outputs a list containing various statistics and metrics of the network, such as the number of genes, the number of active TFs, the top emitters and receivers of the network, the number of self-regulated TFs, the size of the connected components of the graph, the centrality of the TFs and the functional categories, and the gene ontology information.

pfla_EB_stats <- NetworkStats(pfla_EB_nw2, pfla_EB_graph2, pfla_EB_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
## [1] "Basic Stats"
## [1] "Components"
## [1] "13"
## [1] "Centrality and category-based metrics"
## [1] "Centrality"
## [1] "Centrality of TFs"
## [1] "Centrality of Trans-Devs"
## [1] "Edges of Functional Categories"
## [1] "Centrality of Functional Categories"
## [1] "Getting Gene Ontology"
## [1] "Starting analysis 1 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 2 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 3 of 5"
## Warning in getSigGroups(object, test.stat): No enrichment can pe performed -
## there are no feasible GO terms!
## [1] "Starting analysis 4 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 5 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Generating output"

Late Gastrula

Next we do the same for the Late Gastrula Network. We start by filtering the Network:

pfla_LG_nw2 <- FilterNetwork(pfla_LG_nw,q=0.95)
## [1] "Original network is 35854 genes. Filtered network is 8543 genes."

Then we also generate an igraph network using the same percentile.

pfla_LG_graph <- GenerateNetwork(pfla_LG_nw,q = 0.95)

We parse this graph using our list of attributes

pfla_LG_parsenetwork <- ParseNetwork(pfla_LG_graph, pfla_attributes_list)

pfla_LG_graph2 <- pfla_LG_parsenetwork[[1]]
pfla_LG_df_attr <- pfla_LG_parsenetwork[[2]]

And finally we calculate the stats and metrics of this network:

pfla_LG_stats <- NetworkStats(pfla_LG_nw2, pfla_LG_graph2, pfla_LG_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
## [1] "Basic Stats"
## [1] "Components"
## [1] "24"
## [1] "Centrality and category-based metrics"
## [1] "Centrality"
## [1] "Centrality of TFs"
## [1] "Centrality of Trans-Devs"
## [1] "Edges of Functional Categories"
## [1] "Centrality of Functional Categories"
## [1] "Getting Gene Ontology"
## [1] "Starting analysis 1 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 2 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 3 of 5"
## Warning in getSigGroups(object, test.stat): No enrichment can pe performed -
## there are no feasible GO terms!
## [1] "Starting analysis 4 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Starting analysis 5 of 5"
## Warning in rev(-log(as.numeric(allres_x$classicFisher[1:maxgenesplot]) * : NAs
## introduced by coercion
## [1] "Generating output"

Comparing the networks

We will have a look at the output of these wrappers a bit later. We will now compare these two networks using the NetworkStats and the filtered tab-format networks that we have generated.

Part of this comparison involves defining a subset of genes of interest, which we load here.

genes_postgr <- 
  rownames(pfla_rna_dev)[
    pfla_rna_dev$cID > 12
    ]

And another part is loading the results of running ANANSE influence, which takes into account differential gene expression to infer the responsible factors for changes in the networks of cis-regulatory interactions between two states (in our case, the transition from early blastula to late gastrula).

ananse_EB_to_LG <- 
  read.table("/home/ska/aperpos/Def_Pfla/outputs/ananse/20210909_ANANSE_EG_EB",header=T) #change this path

ananse_EB_to_LG$factor <-
  sub("TCONS","TCONS_",ananse_EB_to_LG$factor)

The compareNetworks wrapper compares two networks, nw_a and nw_b, and their respective graphs, graph_a and graph_b, and outputs various metrics, visualizations, and data frames.

The function requires the following arguments:

The function performs the following tasks:

pdf(file = "graphics/EB_LG_comparenetworks.pdf",he=8,wi=10)
EB_vs_LG <- compareNetworks(
  name_network_a = "EB",
  name_network_b = "LG",
  nw_a = pfla_EB_nw2,
  nw_b = pfla_LG_nw2,
  top = 0.9,
  stats_a = pfla_EB_stats,
  stats_b = pfla_LG_stats,
  graph_a = pfla_EB_graph2,
  graph_b = pfla_LG_graph2,
  influence = ananse_EB_to_LG,
  tfs = pfla_tfs_graph_analysis,
  geneset_interest = genes_postgr,
  id2go = pfla_id_GO,
  gene_universe = gene_universe,
  col_a = "#efaa90",
  col_b = "#fbcf99"
)
## [1] "Defining exclusive and common targets"
## [1] "getting GO terms"
## [1] "Starting analysis 1 of 3"
## [1] "Starting analysis 2 of 3"
## [1] "Starting analysis 3 of 3"
## [1] "Dataframe of various metrics"
## [1] "Gene Behaviour across networks"
## [1] "Gene centrality of ALL common genes across networks"
## [1] "Gene centrality of TFs across networks"
## [1] "Gene centrality of trans-dev genes across networks"
## [1] "Graph generation - TF genes"
## [1] "Graph generation - Trans-dev genes"
## [1] "Plots"
## [1] "Mean connections per TF gene"
## [1] "Number of self-regulating TFs"
## [1] "connections per target gene"
## [1] "Venn Diagram Plot"
## [1] "Number of genes of interest in targets"
## [1] "Gene Behaviour plot"
## [1] "Gene Behaviour Box plot"
## [1] "Changes in centrality for ALL genes"
## [1] "Changes in centrality for TF genes"
## [1] "Foldchange in centrality of TFs"
## [1] "graph plots of TFs that change"
## [1] "Changes in trans-dev centrality across networks"
## [1] "Foldchange % emitted connections per gene"
## [1] "graph plots of trans-dev that change"
## [1] "Influence"
## [1] "Influence graph"
## Warning in length(vattrs[[name]]) <- vc: length of NULL cannot be changed
## [1] "Plot influence graph"
dev.off()
## png 
##   2

Network plots

We can use the wrapper NetworkPlots to generate ALL of these plots together. The CompareNetworks wrapper will also generate plots into a file if we run the whole function inside a call to a pdf device, as done above.

pdf(file = "graphics/EB_graph_plots.pdf",he=20,wi=20)

NetworkPlots(
  stats = pfla_EB_stats,
  nw = pfla_EB_nw,
  tfcol = allTFclasses_col,
  layout = TRUE,
  pdf = F
)

dev.off()
## png 
##   2
pdf(file = "graphics/LG_graph_plots.pdf",he=20,wi=20)

NetworkPlots(
  stats = pfla_LG_stats,
  nw = pfla_LG_nw,
  tfcol = allTFclasses_col,
  layout = TRUE,
  pdf = F
)

dev.off()
## png 
##   2

Although these plots have been saved in disk, for clarity, we will explore these analyses by re-plotting them here. We will define the constants of colors.

col_a = "#efaa90"
col_b = "#fbcf99"

We start by having a look at the number of connections per target gene. This is done by counting the number of instances of each gene in the “target” column of the tabular networks.

# num connections per target gene
boxplot(
  main = "connections per\ntarget gene",
  list(
    c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
    c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
  ),
  names = c("\nEarly\nBlastula", "\nLate\nGastrula"),
  col = c(col_a,col_b),
  sub = paste0(
    "Wilcox p.value ",
    wilcox.test(
      x = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
      y = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
    )$p.value
  )
)

We can also have a look at the number of TFs that are self-regulated on each network by counting the number of times a given TF gene appears as a target of itself in the tabular networks.

# Number of self-reg TFs
barplot(
  main = "Number of\nself-regulating TFs",
  height = c(
    EB_vs_LG$comparison_table$a[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"],
    EB_vs_LG$comparison_table$b[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"]
  ),
  names.arg = c("Early\nBlastula", "Late\nGastrula"),
  col = c(col_a,col_b),
)

How different are these networks at the target level? We can dig into this by looking at how overlapped these networks are, and at the enriched GO terms in the exclusive and commonly-shared target genes of each network.

# number of shared targets
x <- 
  list(
    EB = c(
      EB_vs_LG$target_genes$targets_common_ab,
      EB_vs_LG$target_genes$targets_exclusive_a
      ), 
    LG = c(
      EB_vs_LG$target_genes$targets_common_ab,
      EB_vs_LG$target_genes$targets_exclusive_b
      )
  )

# Helper function to display Venn diagram
display_venn <- function(x, ...){
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

# Display the plot directly in R:
display_venn(
  x,
  # Circles
  lwd = 2,
  lty = 'blank',
  fill = c(col_a, col_b),
  # Numbers
  cex = .9,
  fontface = "italic",
  # Set names
  category.names = c("EB" , "LG"),
  cat.cex = 1,
  cat.pos = 180,
  cat.fontface = "bold",
  cat.default.pos = "outer"
  )

Here we have the enriched GO terms for the target genes exclusive to EB, the target genes exclusive to LG, and the common ones.

# GO barplots or just the words for them
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_a
##         GO.ID
## 1  GO:0045892
## 2  GO:0009953
## 3  GO:0045893
## 4  GO:0000122
## 5  GO:1904668
## 6  GO:0045944
## 7  GO:1904589
## 8  GO:1903715
## 9  GO:0034622
## 10 GO:0071481
## 11 GO:0060425
## 12 GO:0060027
## 13 GO:0048286
## 14 GO:0032436
## 15 GO:0014834
## 16 GO:0007362
## 17 GO:0021953
## 18 GO:0006457
## 19 GO:0035019
## 23 GO:0007476
## 24 GO:0021697
## 25 GO:0045648
## 26 GO:0007378
## 27 GO:1901987
## 28 GO:0048103
## 29 GO:0071786
## 30 GO:0030335
##                                                                                   Term
## 1                                  negative regulation of transcription, DNA-templated
## 2                                                     dorsal/ventral pattern formation
## 3                                  positive regulation of transcription, DNA-templated
## 4                            negative regulation of transcription by RNA polymerase II
## 5                             positive regulation of ubiquitin protein ligase activity
## 6                            positive regulation of transcription by RNA polymerase II
## 7                                                         regulation of protein import
## 8                                                    regulation of aerobic respiration
## 9                                         cellular protein-containing complex assembly
## 10                                                          cellular response to X-ray
## 11                                                                  lung morphogenesis
## 12                                       convergent extension involved in gastrulation
## 13                                                           lung alveolus development
## 14    positive regulation of proteasomal ubiquitin-dependent protein catabolic process
## 15 skeletal muscle satellite cell maintenance involved in skeletal muscle regeneration
## 16                                                       terminal region determination
## 17                                       central nervous system neuron differentiation
## 18                                                                     protein folding
## 19                                            somatic stem cell population maintenance
## 23                                            imaginal disc-derived wing morphogenesis
## 24                                                         cerebellar cortex formation
## 25                                  positive regulation of erythrocyte differentiation
## 26                                                               amnioserosa formation
## 27                                           regulation of cell cycle phase transition
## 28                                                          somatic stem cell division
## 29                                  endoplasmic reticulum tubular network organization
## 30                                               positive regulation of cell migration
##    Annotated Significant Expected classicFisher
## 1        919          38    12.66     0.0000071
## 2        263          17     3.62     0.0000071
## 3       1202          43    16.56     0.0000162
## 4        625          23     8.61     0.0000193
## 5         13           4     0.18     0.0000228
## 6        958          30    13.20     0.0000234
## 7        108           9     1.49     0.0000474
## 8         30           5     0.41     0.0000513
## 9       1068          31    14.72     0.0000709
## 10        17           4     0.23     0.0000727
## 11        52           6     0.72     0.0000772
## 12        33           5     0.45     0.0000826
## 13        53           6     0.73     0.0000861
## 14       103           8     1.42     0.0000895
## 15         8           3     0.11       0.00014
## 16        20           4     0.28       0.00014
## 17       207          11     2.85       0.00015
## 18       209          11     2.88       0.00016
## 19        60           6     0.83       0.00017
## 23       289          13     3.98       0.00020
## 24        22           4     0.30       0.00021
## 25        23           4     0.32       0.00025
## 26        10           3     0.14       0.00029
## 27       522          22     7.19       0.00030
## 28        93           7     1.28       0.00030
## 29        11           3     0.15       0.00039
## 30       353          14     4.86       0.00041
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_b
##         GO.ID                                                      Term
## 1  GO:0003310                         pancreatic A cell differentiation
## 2  GO:0042472                                   inner ear morphogenesis
## 4  GO:0048485                    sympathetic nervous system development
## 5  GO:0045229             external encapsulating structure organization
## 6  GO:0021877                          forebrain neuron fate commitment
## 7  GO:0030335                     positive regulation of cell migration
## 8  GO:0045665             negative regulation of neuron differentiation
## 9  GO:0000122 negative regulation of transcription by RNA polymerase II
## 10 GO:0030500                         regulation of bone mineralization
## 11 GO:0008347                                      glial cell migration
## 12 GO:0001764                                          neuron migration
## 13 GO:0021987                               cerebral cortex development
## 14 GO:0014898          cardiac muscle hypertrophy in response to stress
## 15 GO:0003334                                  keratinocyte development
## 16 GO:0042130               negative regulation of T cell proliferation
## 17 GO:0030049                                   muscle filament sliding
## 18 GO:0051668                              localization within membrane
## 19 GO:0021854                                  hypothalamus development
## 20 GO:0030199                              collagen fibril organization
## 21 GO:0060021                                 roof of mouth development
## 29 GO:0061031                  endodermal digestive tract morphogenesis
## 30 GO:0001839                                neural plate morphogenesis
##    Annotated Significant Expected classicFisher
## 1          6           4     0.08    0.00000045
## 2        150          12     1.99    0.00000078
## 4         19           5     0.25    0.00000396
## 5        347          17     4.61    0.00000420
## 6         12           4     0.16    0.00001382
## 7        353          16     4.69    0.00002121
## 8        250          13     3.32    0.00003130
## 9        625          22     8.30    0.00003278
## 10        65           8     0.86    0.00004611
## 11        74           7     0.98    0.00005641
## 12       187          14     2.48    0.00005925
## 13       130           9     1.73    0.00006045
## 14        17           4     0.23    0.00006305
## 15         7           3     0.09    0.00007786
## 16        37           5     0.49       0.00012
## 17         8           3     0.11       0.00012
## 18       551          19     7.32       0.00015
## 19        21           4     0.28       0.00015
## 20        21           4     0.28       0.00015
## 21        88           7     1.17       0.00017
## 29         9           3     0.12       0.00018
## 30         9           3     0.12       0.00018
EB_vs_LG$GOs_targets$GOtable$targets_common_ab
##         GO.ID
## 1  GO:0051668
## 2  GO:0032525
## 3  GO:0045944
## 4  GO:0048025
## 5  GO:0000381
## 6  GO:0048147
## 7  GO:0008103
## 8  GO:0008285
## 9  GO:0035722
## 10 GO:0007430
## 11 GO:0000184
## 12 GO:0001745
## 13 GO:0000122
## 16 GO:0002181
## 17 GO:0048469
## 18 GO:0051128
## 19 GO:0021884
## 20 GO:0032482
## 21 GO:0006413
## 22 GO:1903715
## 23 GO:0007391
## 25 GO:0048264
## 26 GO:0035019
## 27 GO:0006614
## 28 GO:0015988
## 29 GO:0001708
## 30 GO:0046716
##                                                                               Term
## 1                                                     localization within membrane
## 2                                         somite rostral/caudal axis specification
## 3                        positive regulation of transcription by RNA polymerase II
## 4                            negative regulation of mRNA splicing, via spliceosome
## 5                         regulation of alternative mRNA splicing, via spliceosome
## 6                                  negative regulation of fibroblast proliferation
## 7                                     oocyte microtubule cytoskeleton polarization
## 8                             negative regulation of cell population proliferation
## 9                                        interleukin-12-mediated signaling pathway
## 10                                        terminal branching, open tracheal system
## 11             nuclear-transcribed mRNA catabolic process, nonsense-mediated decay
## 12                                                      compound eye morphogenesis
## 13                       negative regulation of transcription by RNA polymerase II
## 16                                                         cytoplasmic translation
## 17                                                                 cell maturation
## 18                                   regulation of cellular component organization
## 19                                                    forebrain neuron development
## 20                                                 Rab protein signal transduction
## 21                                                        translational initiation
## 22                                               regulation of aerobic respiration
## 23                                                                  dorsal closure
## 25                                               determination of ventral identity
## 26                                        somatic stem cell population maintenance
## 27                     SRP-dependent cotranslational protein targeting to membrane
## 28 energy coupled proton transmembrane transport, against electrochemical gradient
## 29                                                         cell fate specification
## 30                                                muscle cell cellular homeostasis
##    Annotated Significant Expected    classicFisher
## 1        551          43    12.04 0.00000000000045
## 2         10           6     0.22 0.00000002048785
## 3        958          48    20.94 0.00000035355660
## 4         23           7     0.50 0.00000040984365
## 5         77          11     1.68 0.00000086856158
## 6         17           6     0.37 0.00000106012818
## 7         18           6     0.39 0.00000156095350
## 8        593          46    12.96 0.00000164491195
## 9         29           7     0.63 0.00000232979446
## 10        20           6     0.44 0.00000314044834
## 11        77          10     1.68 0.00000661803964
## 12       331          28     7.24 0.00000732077374
## 13       625          32    13.66 0.00000788494280
## 16       121          12     2.64 0.00001381648665
## 17       400          28     8.74 0.00001832157318
## 18      2658         111    58.10 0.00002283902845
## 19        41           7     0.90 0.00002677107716
## 20        42           7     0.92 0.00003152709924
## 21       155          13     3.39 0.00003698330703
## 22        30           6     0.66 0.00003998130526
## 23       114          11     2.49 0.00004064315736
## 25        10           4     0.22 0.00004254711440
## 26        60           8     1.31 0.00004601128098
## 27        60           8     1.31 0.00004601128098
## 28        20           5     0.44 0.00005754160947
## 29       227          25     4.96 0.00006058219898
## 30        64           8     1.40 0.00007376842136

How are these changes in target genes achieved? What is happening at the factor level?

For this we can look at the changes in centrality for different genes across networks.

# plots of correlation changes in centrality tfs and transdev
par(mfrow = c(1,2))
plot(
  main = "Changes in TF centrality across networks",
  x = EB_vs_LG$tf_centrality_across_networks$a,
  y = EB_vs_LG$tf_centrality_across_networks$b,
  xlab = "Early Blastula",
  ylab = "Late Gastrula",
  pch = 19,
  cex = 0.75,
  col = alpha("#E58745",0.25)
)

plot(
  main = "Foldchange in centrality of TFs",
  sort(
    EB_vs_LG$tf_centrality_across_networks$b /
    EB_vs_LG$tf_centrality_across_networks$a
    ),
  col = alpha("#E58745",0.5),
  ylab = "foldchange centrality"
)

par(mfrow = c(1,1))

In this case we see a large number of genes tend to acquire higher levels of centrality in LG compared to EB, as shown in the scatter plot and the plot of the foldchange.

And the same about trans-dev genes:

par(mfrow = c(1,2))
# same in transdev
plot(
  main = "Changes in trans-dev centrality across networks",
  x = EB_vs_LG$transdev_centrality_across_networks$a,
  y = EB_vs_LG$transdev_centrality_across_networks$b,
  xlab="Early Blastula",
  ylab = "Late Gastrula",
  pch = 19,
  cex = 0.75,
  col = alpha("#1A9A83",0.25)
)

plot(
  main = "Foldchange in centrality of trans-devs",
  sort(
    EB_vs_LG$transdev_centrality_across_networks$b /
      EB_vs_LG$transdev_centrality_across_networks$a
  ),
  col = alpha("#1A9A83",0.5),
  ylab = "foldchange centrality"
)

par(mfrow = c(1,1))

This is also reflected at the InvsOut-Degree of genes, what we call “gene behaviour”, and how it changes between networks. This can be visualised as cumulative density functions and/or boxplots.

eb_inout_ratio <- 
  pfla_EB_stats$In_Out_per_Gene$ratio[
    pfla_EB_stats$In_Out_per_Gene$ratio > 0
    ]

lg_inout_ratio <- 
  pfla_LG_stats$In_Out_per_Gene$ratio[
    pfla_LG_stats$In_Out_per_Gene$ratio > 0
  ]

plot(
  seq(1:length(eb_inout_ratio)),
  eb_inout_ratio,
  main = "Gene behavior\n(types & amount\nof connections)",
  col = col_a,
  type = "l",
  xlab = "",
  ylab = "% emitting connections",
  lwd = 2,
  ylim = c(0,1)
)

lines(
  seq(1:79),
  lg_inout_ratio[1:79],
  main = "Gene behavior\n(types & amt of connections)",
  col = col_b,
  lwd = 2
)

To have a better idea of what are these TF genes, and since we saw that there is a TF switch at the transcriptional level during development (see previous markdowns), we can showcase the changes in the network structure by looking at the centrality of the different genes by TF class.

In this case we use the function plot_centrailty_TFclass that grabs a named list with teh centrality values of TF genes grouped by class and creates a barplot. Colors are assigned based on the named vector with TFclasses and color.

## Centrality per TF class

par(mfrow = c(2,1))
plot_centrality_TFclass(
  s = pfla_EB_stats$Centrality_per_TFclass,
  f = allTFclasses_col,
  # sub_s = topclasses,
  main = "Centrality per TF Class - EB",
  ylim = c(0.00005,0.00012)
)

plot_centrality_TFclass(
  s = pfla_LG_stats$Centrality_per_TFclass,
  f = allTFclasses_col,
  # sub_s = topclasses,
  main = "Centrality per TF Class - LG",
  ylim = c(0.00005,0.00012)
)

par(mfrow = c(1,1))

Another way to check for differences between target genes in networks is looking at the presence of certain genes of interest in each. For example, we can check how many genes of larval development are being targeted in each of the networks. We see there is a large different in the proportion of larval genes that are embedded in the LG network compared to the EB network. This is hinting at the aforementioned switch towards larval development already taking place at the cis-regulatory level during gastrulation.

# number of post-gastrulation genes in network

barplot(
  main="Post-gastrulation genes\nin network",
  height=c(
    EB_vs_LG[[11]][1]/length(EB_vs_LG$target_genes$targets_exclusive_a)*100,
    EB_vs_LG[[11]][2]/length(EB_vs_LG$target_genes$targets_exclusive_b)*100
  ),
  col = c(
    col_a,
    col_b
  ),
  names=c("EB", "LG"),
  ylab="gene percent",
  ylim = c(0,65),
  las=1
)

Finally, to check the differences between the networks of these developmental stages, we can use the output of ANANSE influence to check for the top factors. Below we re-run the code used to generate the influence plot in compareNetworks().

# influence
influ_tbl <- ananse_EB_to_LG 
influ_tbl <- merge(influ_tbl,pfla_tfs_graph_analysis,by.x=1,by.y=1,all.x=T)
influ_tbl$TFclass[is.na(influ_tbl$TFclass)] <- " "
influ_tbl$col[is.na(influ_tbl$col)] <- "gray"
influ_tbl <- influ_tbl [rev(order(influ_tbl$sumScaled)),]
rownames(influ_tbl) <- NULL

# 'translate' the gene ids to known gene names using a small function
influ_tbl$genename <- 
  translate_ids(x = influ_tbl$factor,dict = pfla_genenames)

Here the plot of ANANSE influence. Colors indicate different TF classes. Where available, gene names have been added.

plot(
  influ_tbl$factor_fc,
  influ_tbl$sumScaled,
  pch=19,
  col=influ_tbl$col,
  bg="black",
  xlab="log2fold change of TF",
  ylab="ANANSE influence score",
  main="Main factors",
  bty="n",
  xlim=c(0,max(influ_tbl$factor_fc)+1)
)
text(
  influ_tbl$factor_fc[1:20],
  influ_tbl$sumScaled[1:20],
  influ_tbl$genename[1:20],
  pos = 3,
  cex=0.85
)

And here is the plot of the graph of these TFs:

V(EB_vs_LG$influence_graph)$genename <- 
  translate_ids(V(EB_vs_LG$influence_graph)$name,pfla_genenames)

E(EB_vs_LG$influence_graph)$width <-
  category_by_quantile(
    E(EB_vs_LG$influence_graph)$prob,
    newvalues = c(0.2,1,2,5)
    )

plot(
  main = "Influence network",
  EB_vs_LG$influence_graph,
  vertex.color = V(EB_vs_LG$influence_graph)$col,
  vertex.label = V(EB_vs_LG$influence_graph)$genename,
  edge.width = E(EB_vs_LG$influence_graph)$width,
  edge.arrow.size = 0.2,
  edge.color = rgb(0,0,0,0.1),
  layout = layout_with_fr(EB_vs_LG$influence_graph)
)

Genes in In Situs

Here there will be some code to subset the graphs and get the genes of each developmental layer.

Networks from Other Organisms

Here there will be code to retrieve the subgraphs of genes from GRNs in other organisms.

Save the Data

We will save the data now:

save(
  # Attributes
  pfla_attributes_list,
  pfla_tfs_graph_analysis,
  pfla_tdhk,
  pfla_tfeg,
  pfla_funcat,
  # Early Blastula
  pfla_EB_nw,
  pfla_EB_nw2,
  pfla_EB_parsenetwork,
  pfla_EB_graph,
  pfla_EB_graph2,
  pfla_EB_stats,
  # Late Gastrula
  pfla_LG_nw,
  pfla_LG_nw2,
  pfla_LG_parsenetwork,
  pfla_LG_graph,
  pfla_LG_graph2,
  pfla_LG_stats,
  # Network comparison
  EB_vs_LG,
  file = "outputs/rda/graph_analysis.rda"
)